suppressPackageStartupMessages({
    library(Seurat)
    library(venn)
    library(dplyr)
    library(cowplot)
    library(ggplot2)
    library(pheatmap)
    library(enrichR)
    library(rafalib)
    library(cowplot)
  library(gplots) 
})


plottheme = theme(legend.title = element_text(size = 8),
                  legend.text = element_text(size = 8),
                  axis.title = element_text(size = 8),
                  axis.text = element_text(size = 8),
                  plot.title = element_text(size = 8))


# library(future)
# plan("multiprocess", workers = 4)
# options(future.globals.maxSize = 24000 * 1024^2)

alldata <- readRDS("../analysis/filtered_EPI_int_clus.rds")

# Set desired clustering resolution
sel.lev = 1

# Set the identity as louvain with resolution selected above
sel.clust = paste0("CCA_snn_res.",sel.lev)

alldata$orig.ident = factor(alldata$orig.ident, levels = c("STD_EPI_d0","GW_EPI_d0"))
alldata$Treatment = factor(alldata$Treatment, levels = c("STD","GW"))

alldata@active.assay = "RNA"

alldata <- SetIdent(alldata, value = sel.clust)
table(alldata@active.ident)
alldata$ident_clust = paste0(alldata@active.ident,"_",alldata$orig.ident)


ggplotColours <- function(n = 6, h = c(0, 360) + 15){
  if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
  hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}
## 
##    0    1    2    3    4    5    6    7 
## 1644 1254 1244 1167  953  727  633  390

1 Used clustering

plot_grid(ncol = 2, DimPlot(alldata, label = T, shuffle = TRUE) + 
            NoAxes() + NoLegend(),
          DimPlot(alldata, group.by = "orig.ident", shuffle = TRUE) + 
            NoAxes() + theme(legend.position = "bottom"))

frequencies = as.data.frame(table(data.frame(cluster=alldata@meta.data[,sel.clust],
                 sample = alldata$orig.ident)))
frequencies$Proportion=NA
for(i in 1:nrow(frequencies)){
  frequencies$Proportion[i] = frequencies$Freq[i]/sum(frequencies$Freq[frequencies$sample==frequencies$sample[i]])
}

plot_grid(ggplot(alldata@meta.data, aes_string(fill= sel.clust, x = "orig.ident")) + plottheme + 
  geom_bar(position = "fill", color = "black") +
  theme(panel.background = element_blank(),axis.ticks = element_blank(), axis.title.y = element_blank()),

ggplot(frequencies, aes(fill=sample, y = Proportion, x = cluster)) + plottheme + 
  geom_bar(stat = "identity",position="dodge") +
  theme(panel.background = element_blank(),axis.ticks.y = element_blank()), rel_widths = c(2,3))

DotPlot(alldata, features = c("Ptprc","Epcam"), 
    assay = "RNA") + coord_flip() + plottheme

2 Cluster marker genes

Top 25 (by p-value) genes with logFC>0.2 for each cluster, sorted by logFC:

if(file.exists(paste0("../analysis/clustermarkers_EPI_CCA_",sel.lev,".rds"))){
  markers_genes = readRDS(paste0("../analysis/clustermarkers_EPI_CCA_",sel.lev,".rds"))
}else{
  markers_genes <- FindAllMarkers(alldata, logfc.threshold = 0.2, test.use = "wilcox", 
  min.pct = 0.1, min.diff.pct = 0.2, only.pos = TRUE, max.cells.per.ident = 300, 
  assay = "RNA", random.seed = 42)
  saveRDS(markers_genes, paste0("../analysis/clustermarkers_EPI_CCA_",sel.lev,".rds"))
}


top25 <- markers_genes %>% group_by(cluster) %>% top_n(-25, p_val)
top25 <- top25 %>% group_by(cluster) %>% top_n(25, avg_log2FC)


mypar(2, 5, mar = c(4, 6, 3, 1))
for (i in unique(top25$cluster)) {
    barplot(sort(setNames(top25$avg_log2FC, top25$gene)[top25$cluster == i], F), horiz = T, 
        las = 1, main = paste0(i, " vs. rest"), border = "white", yaxs = "i")
    abline(v = c(0, 0.25), lty = c(1, 2))
}

Top 5 (by p-value) genes with logFC>0.2 for each cluster:

top5 <- markers_genes %>% group_by(cluster) %>% top_n(-5, p_val_adj)
top5 <- top5 %>% group_by(cluster) %>% top_n(5, avg_log2FC)

alldata <- ScaleData(alldata, features = as.character(unique(top5$gene)), assay = "RNA")
DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust, 
    assay = "RNA") + plottheme

#pdf(paste0("../figures/clustermarkers_EPI_",sel.lev,".pdf"), width = 10, height = 10)
#DoHeatmap(alldata, features =  as.character(unique(top5$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster")
#dev.off()


DotPlot(alldata, features = rev(as.character(unique(top5$gene))), 
    assay = "RNA") + coord_flip() + plottheme

# topAll = markers_genes %>% group_by(cluster)
# DoHeatmap(alldata, features =  as.character(unique(topAll$gene)), assay ="RNA",label = FALSE) + labs(color = "Cluster") 


# DoHeatmap(alldata, features = as.character(unique(top5$gene)), group.by = sel.clust,
#     assay = "RNA", label = FALSE)
# 
# DotPlot(alldata, features = rev(as.character(unique(top5$gene))), group.by = sel.clust, 
#     assay = "RNA") + coord_flip()

Top 3 (by p-value) genes with logFC>0.2 for each cluster:

top3 <- top5 %>% group_by(cluster) %>% top_n(-3, p_val)
top3 <- top3 %>% group_by(cluster) %>% top_n(3, avg_log2FC)

VlnPlot(alldata, features = as.character(unique(top3$gene)), ncol = 5, group.by = sel.clust, 
    assay = "RNA", pt.size = -1)

plot_grid(plotlist = lapply(FeaturePlot(alldata, features = as.character(unique(top3$gene)), 
    slot = "data", combine = FALSE), function(x){x + NoAxes() + plottheme + 
        theme(legend.key.width = unit(5, "pt"))}), ncol = 4)

3 Annotations

Based on the top DE genes, here are my predctions for the cell types represented by each cluster:

  • Cluster 0: Enterocytes
  • Cluster 1: Immune cells
  • Cluster 2: Stem cells
  • Cluster 3: Proliferating stem cells
  • Cluster 4: Immune cells
  • Cluster 5: Other (low quality enterocytes, enteroendocrine)
  • Cluster 6: Enterocytes (Apoc2, Apoa1, Aldh1a1)
  • Cluster 7: Goblet cells

4 Diet comparisons

For each cluster, here are the DE genes between the two treatments:

DGE_list_Treatment = list()

for(i in sort(unique(alldata@active.ident))){
  # select all cells in cluster i
  cell_selection <- subset(alldata, cells = colnames(alldata)[alldata@active.ident == 
      i])
  cell_selection <- SetIdent(cell_selection, value = "Treatment")
  maxcells = 50
  if(min(table(cell_selection@active.ident))<maxcells){
    maxcells = min(table(cell_selection@active.ident))
  }
  # Compute differentiall expression
  DGE_cell_selection <- FindAllMarkers(cell_selection, logfc.threshold = 0.2, test.use = "wilcox", 
      min.pct = 0.1, only.pos = TRUE, max.cells.per.ident = maxcells, 
      assay = "RNA")
  DGE_list_Treatment[[paste("Cluster",i)]] = DGE_cell_selection[DGE_cell_selection$p_val_adj<0.05,]
}

for(i in sort(unique(alldata@active.ident))){
  top20_cell_selection <- DGE_list_Treatment[[paste("Cluster",i)]]
  # print(VlnPlot(subset(alldata, idents = i),unique(top20_cell_selection$gene), 
  #         group.by = "orig.ident", assay = "RNA") + theme(axis.text = element_text(size = 1)))
  if(nrow(top20_cell_selection)>0){
      print(DotPlot(subset(alldata, idents = i), features = rev(as.character(unique(top20_cell_selection$gene))), 
      assay = "RNA", group.by = "Treatment") + coord_flip() + plottheme +
        labs(title = paste("Cluster",i,paste(top3$gene[top3$cluster==i],collapse = ", "))))
  
  }
}